install.packages('fastDummies')
library(fastDummies)
library(tidyverse)  # ggplot(), %>%, mutate(), and friends 
library(broom)
library(MatchIt)  # Match things
library(Rcpp)
library(MASS)
library(modelsummary)
library(IRdisplay)
library(haven)
library(sjmisc)
library(dplyr)

getwd()

################################
################################
################################
# E Q U A L  M A P  A N D  W P #
################################
################################
################################

equal <- read_csv('robustness_matching_equal_wp_map_dataset_with_ses.csv')



equal <- dplyr::rename(equal, Agency=City)


### homicide overlap variable
equal$Year2 <- equal$Year
equal$Code_Overlap <- paste(equal$State, equal$Agency, equal$Year2, equal$Month, sep="_")
equal$Code_Overlap_Agency <- paste(equal$State, equal$Agency, equal$Agentype, equal$Year2, equal$Month, sep="_")
equal$Monthly_Agency_Overlap<- duplicated(equal$Code_Overlap_Agency)
equal$Monthly_Agency_Overlap <- as.numeric(equal$Monthly_Agency_Overlap)
table(equal$Monthly_Agency_Overlap) # check number of overlapping homicides

# add plus one to variables mapping whether homicides involved more than 1 victim, more than 1 offender
equal$VicCount <- equal$VicCount+1
equal$OffCount <- equal$OffCount+1

# decade function
equal <- equal%>% mutate(Decade = case_when(
  Year>='2000' & Year <'2010' ~ "00s",
  Year>='2010' & Year <='2020' ~ "10s",
  TRUE ~ "undetermined"
))


# five years function
equal <- equal%>% mutate(FiveY = case_when(
  Year>='2005' & Year <'2010' ~ "2005-2009",
  Year>='2010' & Year <'2015' ~ "2010-2014",
  Year>='2015' & Year <='2020' ~ "2015-2020",
  TRUE ~ "undetermined"
))

equal <- dplyr::rename(equal, VicAge=Victim_Age)

equal <- equal%>% mutate(AgeCat = case_when(
  VicAge>=0 & VicAge<=5~ "0-5",
  VicAge>5 & VicAge<=10~ "6-10",
  VicAge>10 & VicAge<=15~ "11-15",
  VicAge>15 & VicAge<=20~ "16-20",
  VicAge>20 & VicAge<=25~ "21-25",
  VicAge>25 & VicAge<=30~ "26-30",
  VicAge>30 & VicAge<=35~ "31-35",
  VicAge>35 & VicAge<=40~ "36-40",
  VicAge>40 & VicAge<=45~ "41-45",
  VicAge>45 & VicAge<=50~ "46-50",
  VicAge>50 & VicAge<=55~ "51-55",
  VicAge>55 & VicAge<=60~ "56-60",
  VicAge>60 & VicAge<=65~ "61-65",
  VicAge>65 & VicAge<=70~ "66-70",
  VicAge>70 & VicAge<=75~ "71-75",
  VicAge>75 & VicAge<=80~ "76-80",
  VicAge>80 & VicAge<=85~ "81-85",
  VicAge>85 & VicAge<=90~ "86-90",
  VicAge>90 & VicAge<=95~ "91-95",
  VicAge>95 & VicAge<=99~ "96-99",
  TRUE ~ "999"
))

# filter out inconsistent observations with age=999
equal <- equal[!(equal$AgeCat=="999"),]


# # create dummies for relevant columns
library(fastDummies)
equal <- dummy_cols(equal, select_columns=c('VicRace','Solved'),
                    remove_selected_columns = TRUE)

# drop irrelevant columns
equal2 <- dplyr::select(equal, -c('Year', 'FileDate', 'MSA', 'Code_Overlap', 'Code_Overlap_Agency','VicAge', 'Solved_No',
                                  'CNTYFIPS','Ori',	'Agency',	'Source',	'StateName',	'Incident','ActionType','Situation','VicEthnic',
                                  'OffAge','OffSex', 'OffRace', 	'OffEthnic',	'Relationship',	'Subcircum'))

# export to csv
write.csv(equal2, "dataset_map_equal_ready_with_ses.csv", row.names=TRUE)


################################
################################
################################
##### D I S C O R D A N T  #####
################################
################################
################################


discordant <- read_csv('robustness_matching_discordant_wp_map_dataset_with_ses.csv')

discordant <- dplyr::rename(discordant, Agency=City)

### homicide overlap variable
discordant$Year2 <- discordant$Year
discordant$Code_Overlap <- paste(discordant$State, discordant$Agency, discordant$Year2, discordant$Month, sep="_")
discordant$Code_Overlap_Agency <- paste(discordant$State, discordant$Agency, discordant$Agentype, discordant$Year2, discordant$Month, sep="_")
discordant$Monthly_Agency_Overlap<- duplicated(discordant$Code_Overlap_Agency)
discordant$Monthly_Agency_Overlap <- as.numeric(discordant$Monthly_Agency_Overlap)
table(discordant$Monthly_Agency_Overlap) # check number of overlapping homicides

# add plus one to variables mapping whether homicides involved more than 1 victim, more than 1 offender
discordant$VicCount <- discordant$VicCount+1
discordant$OffCount <- discordant$OffCount+1

# decade function
discordant <- discordant%>% mutate(Decade = case_when(
  Year>='2000' & Year <'2010' ~ "00s",
  Year>='2010' & Year <='2020' ~ "10s",
  TRUE ~ "undetermined"
))


# five years function
discordant <- discordant%>% mutate(FiveY = case_when(
  Year>='2005' & Year <'2010' ~ "2005-2009",
  Year>='2010' & Year <'2015' ~ "2010-2014",
  Year>='2015' & Year <='2020' ~ "2015-2020",
  TRUE ~ "undetermined"
))

discordant <- dplyr::rename(discordant, VicAge=Victim_Age)

discordant <- discordant%>% mutate(AgeCat = case_when(
  VicAge>=0 & VicAge<=5~ "0-5",
  VicAge>5 & VicAge<=10~ "6-10",
  VicAge>10 & VicAge<=15~ "11-15",
  VicAge>15 & VicAge<=20~ "16-20",
  VicAge>20 & VicAge<=25~ "21-25",
  VicAge>25 & VicAge<=30~ "26-30",
  VicAge>30 & VicAge<=35~ "31-35",
  VicAge>35 & VicAge<=40~ "36-40",
  VicAge>40 & VicAge<=45~ "41-45",
  VicAge>45 & VicAge<=50~ "46-50",
  VicAge>50 & VicAge<=55~ "51-55",
  VicAge>55 & VicAge<=60~ "56-60",
  VicAge>60 & VicAge<=65~ "61-65",
  VicAge>65 & VicAge<=70~ "66-70",
  VicAge>70 & VicAge<=75~ "71-75",
  VicAge>75 & VicAge<=80~ "76-80",
  VicAge>80 & VicAge<=85~ "81-85",
  VicAge>85 & VicAge<=90~ "86-90",
  VicAge>90 & VicAge<=95~ "91-95",
  VicAge>95 & VicAge<=99~ "96-99",
  TRUE ~ "999"
))

# filter out inconsistent observations with age=999
discordant <- discordant[!(discordant$AgeCat=="999"),]


# # create dummies for relevant columns

discordant <- dummy_cols(discordant, select_columns=c('VicRace','Solved'),
                         remove_selected_columns = TRUE)

# drop irrelevant columns
discordant2 <- dplyr::select(discordant, -c('Year', 'FileDate', 'MSA', 'Code_Overlap', 'Code_Overlap_Agency','VicAge', 'Solved_No',
                                            'CNTYFIPS','Ori',	'Agency',	'Source',	'StateName',	'Incident','ActionType','Situation','VicEthnic',
                                            'OffAge','OffSex', 'OffRace', 	'OffEthnic',	'Relationship',	'Subcircum'))

# export to csv
write.csv(discordant2, "dataset_map_discordant_ready_with_ses.csv", row.names=TRUE)